load(file = paste0(IO$output_data, "users.Rdata"), verbose = TRUE)## Loading objects:
## users
load(file = paste0(IO$output_data, "cycles.Rdata"), verbose = TRUE)## Loading objects:
## cycles
# average app usage duration
agg_start = aggregate(cycle_start ~ user_id, cycles, min)
colnames(agg_start) = c("user_id", "start_first_cycle")
agg_end = aggregate(cycle_start+cycle_length ~ user_id, cycles, max)
colnames(agg_end) = c("user_id", "end_last_cycle")
agg = merge(agg_start, agg_end)
m = match(users$user_id, agg$user_id)
agg = agg[m,]
agg$app_duration_in_days = as.numeric(agg$end_last_cycle - agg$start_first_cycle)
agg$app_duration_in_years = agg$app_duration_in_days / 365ju = which(users$BC != "unclear")
jc = which(cycles$BC != "unclear")
# Number of users
df = data.frame(field = "Number of users",original_dataset = nrow(users), filtered_dataset = sum(users$BC != "unclear"))
# Number of cycles
df = rbind(df,
data.frame(field = "Number of cycles",original_dataset = nrow(cycles), filtered_dataset = sum(cycles$BC != "unclear")))
# Number of breast tenderness symptoms
df = rbind(df,
data.frame(field = "Number of breast tenderness symptoms",original_dataset = sum(cycles$n_TB), filtered_dataset = sum(cycles$n_TB[jc])))
# average app usage duration
df = rbind(df,
data.frame(field = "Average app usage duration (in year)",
original_dataset = round(mean(agg$app_duration_in_years),2),
filtered_dataset = round(mean(agg$app_duration_in_years[ju]),2)))
df = rbind(df,
data.frame(field = "Average app usage duration (sd)",
original_dataset = round(sd(agg$app_duration_in_years),2),
filtered_dataset = round(sd(agg$app_duration_in_years[ju]),2)))
# average app number of cycles
df = rbind(df,
data.frame(field = "Average # of cycles per users",
original_dataset = round(mean(users$n_cycles),2),
filtered_dataset = round(mean(users$n_cycles[ju]),2)))
df = rbind(df,
data.frame(field = "Average # of cycles (sd)",
original_dataset = round(sd(users$n_cycles),2),
filtered_dataset = round(sd(users$n_cycles[ju]),2)))
# average number of breast tenderness per users
df = rbind(df,
data.frame(field = "Average # of breast tenderness per users",
original_dataset = round(mean(users$n_tender_breasts),2),
filtered_dataset = round(mean(users$n_tender_breasts[ju]),2)))
df = rbind(df,
data.frame(field = "Median # of breast tenderness",
original_dataset = round(median(users$n_tender_breasts),2),
filtered_dataset = round(median(users$n_tender_breasts[ju]),2)))
df = rbind(df,
data.frame(field = "Average # of breast tenderness (sd)",
original_dataset = round(sd(users$n_tender_breasts),2),
filtered_dataset = round(sd(users$n_tender_breasts[ju]),2)))
# average number of breast tenderness per cycles
df = rbind(df,
data.frame(field = "Average # of breast tenderness per cycle",
original_dataset = round(mean(cycles$n_TB),2),
filtered_dataset = round(mean(cycles$n_TB[jc]),2)))
df = rbind(df,
data.frame(field = "Average # of breast tenderness per cycle (sd) ",
original_dataset = round(sd(cycles$n_TB),2),
filtered_dataset = round(sd(cycles$n_TB[jc]),2)))
# percentage of cycles per BC (original)
table_BC_CLUE_o = table(cycles$birth_control_CLUE)
table_BC_CLUE_o_perc = round(100*table_BC_CLUE_o/nrow(cycles),2)
table_BC_CLUE_f = table(cycles$birth_control_CLUE[jc])
table_BC_CLUE_f_perc = round(100*table_BC_CLUE_f/length(jc),2)
df = rbind(df,
data.frame(field = paste0("% of cycles per BC as logged by the users (",names(table_BC_CLUE_o_perc),")"),
original_dataset = as.numeric(table_BC_CLUE_o_perc),
filtered_dataset = as.numeric(table_BC_CLUE_f_perc)))
# percentage of cycles per BC (re-assigned)
table_BC_o = table(cycles$BC)
table_BC_o_perc = round(100*table_BC_o/nrow(cycles),2)
table_BC_f = table(cycles$BC[jc])
table_BC_f_perc = round(100*table_BC_f/length(jc),2)
df = rbind(df,
data.frame(field = paste0("% of cycles per BC as re-assigned (",names(table_BC_o_perc),")"),
original_dataset = as.numeric(table_BC_o_perc),
filtered_dataset = c(as.numeric(table_BC_f_perc),0)))
df## field
## 1 Number of users
## 2 Number of cycles
## 3 Number of breast tenderness symptoms
## 4 Average app usage duration (in year)
## 5 Average app usage duration (sd)
## 6 Average # of cycles per users
## 7 Average # of cycles (sd)
## 8 Average # of breast tenderness per users
## 9 Median # of breast tenderness
## 10 Average # of breast tenderness (sd)
## 11 Average # of breast tenderness per cycle
## 12 Average # of breast tenderness per cycle (sd)
## 13 % of cycles per BC as logged by the users (none / condoms)
## 14 % of cycles per BC as logged by the users (pill)
## 15 % of cycles per BC as logged by the users (did not enter)
## 16 % of cycles per BC as re-assigned (none / condoms)
## 17 % of cycles per BC as re-assigned (pill)
## 18 % of cycles per BC as re-assigned (unclear)
## original_dataset filtered_dataset
## 1 216484.00 210529.00
## 2 3686977.00 3056348.00
## 3 4530662.00 4132033.00
## 4 1.54 1.55
## 5 0.65 0.65
## 6 17.03 17.15
## 7 7.45 7.48
## 8 20.93 21.27
## 9 8.00 8.00
## 10 36.06 36.43
## 11 1.23 1.35
## 12 2.94 3.12
## 13 28.42 31.50
## 14 39.08 39.29
## 15 32.50 29.21
## 16 48.94 59.04
## 17 33.95 40.96
## 18 17.10 0.00
write.csv(df, file = paste0(IO$tables,"Table_1.csv") , row.names = FALSE)table_n = table(cycles$birth_control_CLUE, cycles$BC)
table_perc = round( 100*table_n/apply(table_n, 1, sum) , 2)
rownames(table_n) = paste0(rownames(table_n), ": n cycles")
table_n##
## none / condoms pill unclear
## none / condoms: n cycles 895215 67507 85295
## pill: n cycles 215958 984776 240084
## did not enter: n cycles 693365 199527 305250
rownames(table_perc) = paste0(rownames(table_perc), ": % cycles")
table_perc##
## none / condoms pill unclear
## none / condoms: % cycles 85.42 6.44 8.14
## pill: % cycles 14.99 68.35 16.66
## did not enter: % cycles 57.87 16.65 25.48
table_BC = rbind(table_n,table_perc)
o = order(rownames(table_BC), decreasing = TRUE)
table_BC = table_BC[o,]
table_BC## none / condoms pill unclear
## pill: n cycles 215958.00 984776.00 240084.00
## pill: % cycles 14.99 68.35 16.66
## none / condoms: n cycles 895215.00 67507.00 85295.00
## none / condoms: % cycles 85.42 6.44 8.14
## did not enter: n cycles 693365.00 199527.00 305250.00
## did not enter: % cycles 57.87 16.65 25.48
write.csv(table_BC, file = paste0(IO$tables, "Supplementary_Table_BC_table.csv"))g = ggplot(users, aes(x = age, fill = birth_control))
g + geom_histogram(binwidth = 1) + facet_grid(birth_control ~.)g = ggplot(users, aes(x = age, fill = country))
g + geom_histogram(binwidth = 1) + facet_wrap(country ~.)load(file = paste0(IO$out_Rdata, "number_of_users_and_cycles_per_category.Rdata"), verbose = TRUE)## Loading objects:
## agg
g = ggplot(agg, aes(y = BC, x= 0, col = BC)) +
facet_grid(bmi_cat ~ age_cat) +
geom_text(aes(label = n_users)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank()) +
guides(color = FALSE, size = FALSE) + xlab("") + ylab("") +
ggtitle("Number of users per category of age (horizontal) and bmi (vertical)")
g## Warning: Removed 16 rows containing missing values (geom_text).
ggsave(filename = paste0(IO$panels, "S1_X_number_of_users_per_cat.pdf"), plot = g)## Saving 7 x 5 in image
## Warning: Removed 16 rows containing missing values (geom_text).
g = ggplot(agg, aes(y = BC, x= 0, col = BC)) +
facet_grid(bmi_cat ~ age_cat) +
geom_text(aes(label = n_cycles)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank()) +
guides(color = FALSE, size = FALSE) + xlab("") + ylab("") +
ggtitle("Number of cycles per category of age (horizontal) and bmi (vertical)")
g## Warning: Removed 51 rows containing missing values (geom_text).
ggsave(filename = paste0(IO$panels, "S1_X_number_of_cycles_per_cat.pdf"), plot = g)## Saving 7 x 5 in image
## Warning: Removed 51 rows containing missing values (geom_text).
ju = which(users$BC %in% as.character(par$BC_dict$name))
g = ggplot(users[ju,], aes(x = n_tender_breasts, fill = BC))+
geom_histogram(binwidth = 1, position = "identity", alpha = 0.5)+xlim(c(0,20))
g## Warning: Removed 49439 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
j = which((users$BC %in% as.character(par$BC_dict$name)) &
(users$perc_cycles_with_TB>=0) &
(users$perc_cycles_with_TB<=1))
g = ggplot(users[j,], aes(x = perc_cycles_with_TB, fill = BC))+
geom_histogram(binwidth = 0.1, position = "identity", alpha = 0.5)
gtable_BC_BT = table(users$BC[j], round(users$perc_cycles_with_TB[j],1))
table_BC_BT##
## 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8
## none / condoms 15508 10789 11093 8348 8965 8314 8283 7290 9043
## pill 23834 10241 6815 3493 2846 2012 1648 1133 1364
##
## 0.9 1
## none / condoms 7646 6801
## pill 809 859
table_BC_BT_perc = round(100*table_BC_BT/apply(table_BC_BT,1,sum),2)
table_BC_BT_perc##
## 0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8
## none / condoms 15.19 10.57 10.87 8.18 8.78 8.14 8.11 7.14 8.86
## pill 43.29 18.60 12.38 6.34 5.17 3.65 2.99 2.06 2.48
##
## 0.9 1
## none / condoms 7.49 6.66
## pill 1.47 1.56
table_BC_BT_perc_df = as.data.frame(table_BC_BT_perc)
colnames(table_BC_BT_perc_df) = c("BC","perc_cycles_with_TB","perc_users")
g = ggplot(table_BC_BT_perc_df, aes(x = perc_cycles_with_TB,y = perc_users, fill = BC))+
geom_bar(stat = "identity",position = "dodge", alpha = 1)+
xlab("Fraction of cycles with at least one reported breast tenderness")+
ylab("% of users")+
geom_text(data = data.frame(BC = names(table(users$BC[j])),n_users = as.numeric(table(users$BC[j]))),
aes(col = BC, label = paste("tot # users:",n_users),
x = 8,y = 0.6*max(table_BC_BT_perc_df$perc_users)+6*as.numeric(BC)))
gggsave(file = paste0(IO$panels, "perc of users vs perc of cycles with at least one BT.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.4*viz$full_width/2,
scale = viz$scale*0.85)#load(paste0(IO$input_data,"days/days_1.Rdata"), verbose = TRUE)
load(file = paste0(IO$out_Rdata,"sub_days_examples_input_data.Rdata"), verbose = TRUE)## Loading objects:
## sub_days
days = sub_days
user_ids = unique(days$user_id)
for(user_id in user_ids){
j = which((days$user_id == user_id)&(!is.na(days$cycle_id_m)) & (days$category != "pill_hbc") & (days$cycle_nb_m %in% 4:8))
d = days[j,]
g = ggplot_user_example(d = d, title = FALSE, size_factor = 1.5)
print(g)
ggsave(filename = paste0(IO$panels, "example_",d$user_id[1],".pdf"), plot = g, width = viz$full_width/2.8, height = viz$full_width/4)
}rm(days, user_id, user_ids, g, d, j)## Warning in rm(days, user_id, user_ids, g, d, j): object 'd' not found
load(file = paste0(IO$out_Rdata,"tracking_behavior_overal_pattern_by_tracking_group_and_birth_control.Rdata"), verbose = TRUE)## Loading objects:
## agg
unique(agg[,c("BC","tracking_group","tot_n_cycles")])## BC tracking_group tot_n_cycles
## 1 none / condoms menstrual tracking 368956
## 27 pill menstrual tracking 220308
## 53 none / condoms pre-menstrual tracking 254472
## 79 pill pre-menstrual tracking 37063
## 105 none / condoms regular tracking 459701
## 131 pill regular tracking 700152
## 157 none / condoms sporadic tracking 684557
## 183 pill sporadic tracking 263187
agg$SE = agg$fraction_cycles_with_logs_SE
g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_logs, col = BC, shape = BC)) +
geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
geom_ribbon(aes(ymin = fraction_cycles_with_logs-1.96*SE, ymax = fraction_cycles_with_logs+1.96*SE, fill = BC),
alpha = 0.5, col = NA)+
geom_line() + geom_point()+
scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
scale_colour_manual(values= cols$BC , name="BC")+
scale_fill_manual(values= cols$BC , name="BC")+
guides(col = FALSE, fill = FALSE, shape = FALSE)+
xlab("cycleday from 1st day of menstruation") + ylab("% cycles with any log")+
theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
facet_grid( . ~ tracking_group)
gg_n = ggplot(agg, aes(x = cycleday_m_D, y = avg_n_logs, col = BC, shape = BC)) +
geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
geom_line() + geom_point()+
scale_x_continuous(breaks = par$x.axis)+
scale_colour_manual(values= cols$BC , name="BC")+
scale_fill_manual(values= cols$BC , name="BC")+
guides(col = FALSE, fill = FALSE, shape = FALSE)+
xlab("cycleday from 1st day of menstruation") + ylab("average # of logs")+
theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
facet_grid( . ~ tracking_group)+
geom_text(aes(x = par$Df, y = 0+ 0.1*(match(BC, par$BC_dict$name)-1), label = paste0("n: ",tot_n_cycles)),
hjust = 1, vjust = 1)
g_nggsave(file = paste0(IO$panels, "tracking_profile_perc_cycles_with_logs_per_tracking_group_and_BC.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.3*viz$full_width/2,
scale = viz$scale*1.2)
ggsave(file = paste0(IO$panels, "tracking_profile_avg_log_per_tracking_group_and_BC.pdf"),
plot = g_n,
width = viz$full_width/2,
height = 0.3*viz$full_width/2,
scale = viz$scale*1.2)load(file = paste0(IO$out_Rdata,"TB_overal_pattern_by_tracking_group_and_birth_control.Rdata"), verbose = TRUE)## Loading objects:
## agg
unique(agg[,c("BC","tracking_group","tot_n_cycles")])## BC tracking_group tot_n_cycles
## 1 none / condoms menstrual tracking 368956
## 27 pill menstrual tracking 220308
## 53 none / condoms pre-menstrual tracking 254472
## 79 pill pre-menstrual tracking 37063
## 105 none / condoms regular tracking 459701
## 131 pill regular tracking 700152
## 157 none / condoms sporadic tracking 684557
## 183 pill sporadic tracking 263187
agg$SE = agg$fraction_cycles_with_TB_SE
g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = BC, shape = BC)) +
geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
geom_ribbon(aes(ymin = fraction_cycles_with_TB-1.96*SE, ymax = fraction_cycles_with_TB+1.96*SE, fill = BC), alpha = 0.5, col = NA)+
geom_line() + geom_point()+
scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
scale_colour_manual(values= cols$BC , name="BC")+
scale_fill_manual(values= cols$BC , name="BC")+
guides(col = FALSE, fill = FALSE, shape = FALSE)+
xlab("cycleday from 1st day of menstruation") + ylab("% cycles with reported BT")+
theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
facet_grid( . ~ tracking_group)+
geom_text(aes(x = -par$D, y = 0.27+ 0.03*(match(BC, par$BC_dict$name)-1), label = paste0("n: ",tot_n_cycles)),
hjust = 0, vjust = 1)
gggsave(file = paste0(IO$panels, "symptoms_profile_per_tracking_group_and_BC.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.3*viz$full_width/2,
scale = viz$scale*1.2)load(file = paste0(IO$out_Rdata,"TB_overal_pattern_by_age_cat_and_birth_control_for_regular_tracking.Rdata"), verbose = TRUE)## Loading objects:
## agg
agg$age_x_BC = interaction(agg$age_cat , agg$BC)
agg$SE = agg$fraction_cycles_with_TB_SE
j = which(agg$age_cat %in% par$age_cat_exclude)
if(length(j)>0){agg = agg[-j,]}
j = which((agg$age_cat == "(35,40]") & (agg$BC == "pill"))
if(length(j)>0){agg = agg[-j,]}
agg = agg[which(agg$tracking_group == "regular tracking"),]
g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = age_x_BC, shape = BC)) +
geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
geom_ribbon(aes(ymin = fraction_cycles_with_TB-1.96*SE, ymax = fraction_cycles_with_TB+1.96*SE, fill = age_x_BC), alpha = 0.5, col = NA)+
geom_line() + geom_point()+
scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
scale_colour_manual(values= cols$age_x_BC , name="Age group x BC")+
scale_fill_manual(values= cols$age_x_BC , name="Age group x BC")+
guides(col = FALSE, fill = FALSE, shape = FALSE)+
xlab("cycleday from 1st day of menstruation") + ylab("% cycles with reported TB")+
theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))#+
#facet_grid(. ~ tracking_group)
gggsave(filename = paste0(IO$panels, "symptoms_profile_by_BC_and_age_cat_for_regular_tracking.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.5*viz$full_width/2,
scale = viz$scale)load(file = paste0(IO$out_Rdata, "avg_symptoms_per_user.Rdata"), verbose = TRUE)## Loading objects:
## agg
g = ggplot(agg[agg$BC %in% c("pill","none / condoms"),], aes(x = n_TB_by_n_cycles, y = perc_users, fill = BC)) +
geom_bar(stat = "identity", position = "dodge" )+
scale_fill_manual(values = cols$BC)+
xlab("# of reported TB symptoms / # of cycles")+
ylab("% of users") + guides(fill = FALSE)
gggsave(filename = paste0(IO$panels, "1_avg_TB_per_user.pdf"), plot = g, width = viz$full_width/4.5, height = viz$full_width/4.5, scale = viz$scale)load(file = paste0(IO$out_Rdata, "n_TB_per_cycle.Rdata"), verbose=TRUE)## Loading objects:
## agg
g = ggplot(agg[agg$BC != "unclear",], aes(x = tender_breasts_ul, y = perc_cycles, fill = BC)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = cols$BC3) + guides(fill = FALSE)+
xlab("# of reported Tender Breast symptoms per cycle")+ ylab("% of cycles")
gggsave(filename = paste0(IO$panels, "1_number_of_TB_per_cycle.pdf"), plot = g, width = viz$full_width/4, height = viz$full_width/4.5, scale = viz$scale)load(file = paste0(IO$out_Rdata, "symptoms_predictors.Rdata"), verbose=TRUE)## Loading objects:
## pred
g_z = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(input ~ .)+
guides(col = FALSE)
g_zg_z = g_z + guides(linetype = FALSE)
g_p = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(input ~ .)+
guides(col = FALSE)
g_pg_p = g_p + guides(linetype = FALSE)
g_z_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(country_name ~ .)+
guides(col = FALSE)
g_z_countriesg_z_countries = g_z_countries + guides(linetype = FALSE)
g_p_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(country_name ~ .)+
guides(col = FALSE)
g_p_countriesg_p_countries = g_p_countries + guides(linetype = FALSE)
ggsave(filename = paste0(IO$panels, "predictors_z.pdf"),
plot = g_z,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "predictors_z_countries.pdf"),
plot = g_z_countries,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "predictors_p.pdf"),
plot = g_p,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "predictors_p_countries.pdf"),
plot = g_p_countries,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)load(file = paste0(IO$out_Rdata,"evolution_of_symptom_tracking_in_time.Rdata"), verbose = TRUE)## Loading objects:
## fraction_TB
load(file = paste0(IO$out_Rdata,"evolution_of_symptom_tracking_in_time_min_n_cycles.Rdata"), verbose = TRUE)## Loading objects:
## min_n_cycles
fraction_TB$init_TB_group_text = factor(paste0("avg. #BT in cycles 1-5 = ",fraction_TB$init_TB_group))
min_n_cycles$init_TB_group_text = factor(paste0("avg. #BT in cycles 1-5 = ",min_n_cycles$init_TB_group))
fraction_TB$BC_x_init_TB_group = interaction(fraction_TB$init_TB_group, fraction_TB$BC)
min_n_cycles$BC_x_init_TB_group = interaction(min_n_cycles$init_TB_group, min_n_cycles$BC)
g = ggplot(fraction_TB, aes(x = cycle_nb_m_rel, y = median_n_TB, col = BC_x_init_TB_group, fill = BC_x_init_TB_group)) +
geom_line() +
geom_line(aes(y = avg_n_TB), linetype = 3) +
geom_ribbon(aes(ymin = q_05_n_TB , ymax = q_95_n_TB), alpha = 0.2, col = NA)+
geom_ribbon(aes(ymin = q_25_n_TB , ymax = q_75_n_TB), alpha = 0.2, col = NA)+
geom_text(data = min_n_cycles, aes(label = paste0("min # = ",min_n_cycles)), x = 0.5, y = 0.95*max(fraction_TB$q_95_n_TB), size = 3, vjust = 0, hjust = 0)+
facet_grid(BC ~ init_TB_group_text )+
scale_y_continuous(breaks = seq(0,21, by = 3), minor_breaks = 0:21)+
scale_color_manual(values = cols$BC_x_init_TB_group) + scale_fill_manual(values = cols$BC_x_init_TB_group)+
guides(col = FALSE, fill = FALSE) +
xlab("Cycle number") + ylab("# of reported BT")
gggsave(filename = paste0(IO$panels, "evolution_in_time.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.5*viz$full_width/2,
scale = viz$scale)load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)## Loading objects:
## sel_d
sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]
if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)){
g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent,
facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
g_average = ggplot_imputed_TB(sel_d = sel_d_average,
facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent,
facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
}load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)## Loading objects:
## sel_d
user_ids =
c("586e33d674b46555254e908ec42d6771ed294bcd", "ff484ef33fd2c29086617ed63a9814571b86f15d",
"677332825213f1f0cb1e6831ead1fc787cf01663", "8714aad3100409673b8762775e474aff17321df4",
"40620830924a2f02935e3a6c6102f84e5de35fce","6ef947aeb9cfe0468bd1efd26b8a10dca0a1e645")
sel_d = sel_d[which(sel_d$user_id %in% user_ids),]
sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]
if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)&(nrow(sel_d_average)>0)){
g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent,
facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
g_average = ggplot_imputed_TB(sel_d = sel_d_average,
facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent,
facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
g_consistency = arrangeGrob(g_inconsistent, g_average, g_consistent, nrow = 1, widths = c(1,1,1))
g_consistency
ggsave(filename = paste0(IO$panels, "consistency_examples.pdf"),
plot = g_consistency,
width = viz$full_width/2,
height = 0.33*viz$full_width/2,
scale = viz$scale*1.7)
}load(file = paste0(IO$out_Rdata,"consistency_summary.Rdata"), verbose = TRUE)## Loading objects:
## df
df$BC = factor(df$BC, levels = c(as.character(par$BC_dict$name),"both"))
df$BC_x_init_TB_group = interaction(df$init_TB_group, df$BC)
g = ggplot(df, aes(x = consistency, y = n_users, fill = BC_x_init_TB_group)) +
geom_area(position = "identity", alpha = 0.75) +
facet_grid(BC ~ . , scale = "free_y")+
scale_fill_manual(values = cols$BC_x_init_TB_group2)+
guides(fill = FALSE)
gggsave(filename = paste0(IO$panels, "consistency_summary.pdf"),
plot = g,
width = viz$full_width/2.5,
height = 0.6*viz$full_width/2.5,
scale = viz$scale)for(bc in c("NC","pill")){
i = (bc == "pill")+1
BC = par$BC_dict$name[i]
load(file = paste0(IO$tmp_data,"user_phenotyping_",bc,".Rdata"), verbose = TRUE)
# screeplot
#eig_df = data.frame(x = 1:10, eig = mds$eig[1:10])
#g = ggplot(eig_df, aes(x = x, y = eig)) + geom_bar(stat = "identity", fill = cols$BC[i])
#g
# ggsave(filename = paste0(IO$panels, "S_screeplot_",bc,".pdf"),
# plot = g,
# width = viz$full_width/3,
# height = 0.75*viz$full_width/3,
# scale = viz$scale)
# interactive 3D
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~n_days,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~time,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~max,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~time_last_symptoms,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
# interactive 2D
plot_ly(mds_df, x = ~X1, y = ~X2,
color = ~X3,
type = "scatter", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X3,
color = ~X2,
type = "scatter", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
if(bc == "pill"){
#mds_df$user_id[grep("65035d",mds_df$user_id)]
user_ids = c("0ceafb77c214fb6e45d069e6c645083a9f7799d0","d0fcb8602f237946a56162f047c8ef3ef254541c",special_pill_user,
"73266275a3d6f3898cf58269070edb4d4edd2034","e610274623760e9c4f1a50e0ed5a00f17117ed0b",
"65035d05a6ccabaaeb82ecd4e17bde1c096adc2f",
"c45cd0e050cf6d704da32cbab899bacf51a519a3")
}else{
user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
"c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
"2eedf35808effb0a40381e11d1c94257096ab8de","cffcd394d8f769d363f67eada7f16b0106c3191a",
"b9590a55307d916d4aabc8e386bc76586a28ae0b")
user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
"cffcd394d8f769d363f67eada7f16b0106c3191a","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
"c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","34c8890de2da03e29a7f93771ea9ecaf12cf4c88",
"2eedf35808effb0a40381e11d1c94257096ab8de")
}
j_mds = which(mds_df$user_id %in% user_ids)
mds_df_subset = mds_df[j_mds,]
mds_df_subset$user_id = factor(as.character(mds_df_subset$user_id), levels = user_ids)
# MDS
g_12 = ggplot(mds_df, aes(x = X1, y = X2)) + coord_fixed()+
geom_point(alpha = 0.5, size = 0.3, col = "gray40")+
geom_point(data = mds_df_subset, aes(x = X1, y = X2, col = user_id), size = 2, alpha = 0.7)+
guides(col = FALSE)
g_13 = ggplot(mds_df, aes(x = X1, y = X3)) + coord_fixed()+
geom_point(alpha = 0.5, size = 0.3, col = "gray40") +
geom_point(data = mds_df_subset, aes(x = X1, y = X3, col = user_id), size = 2, alpha = 0.7)+
guides(col = FALSE)
grid.arrange(g_12, g_13, nrow = 1)
g_mds = arrangeGrob(g_12, g_13, nrow = 1)
g_mds
ggsave(filename = paste0(IO$panels, "mds_",bc,".pdf"),
plot = g_mds,
width = viz$full_width/2,
height = 0.5*viz$full_width/2,
scale = viz$scale)
avg_profiles = this_avg_profile_per_user[which(this_avg_profile_per_user$user_id %in% user_ids),]
c_wide_subset = this_c_wide[which(this_c_wide$user_id %in% user_ids),]
avg_profiles = melt(c_wide_subset)
avg_profiles$avg_tender_breast = avg_profiles$value
avg_profiles$user_id = factor(as.character(avg_profiles$user_id), levels = user_ids)
g_profiles = ggplot(avg_profiles, aes(x = cycleday_m_D, y = avg_tender_breast, fill = user_id, col = user_id))+
#geom_point()+
#geom_line()+
geom_area(position = "identity", alpha = 0.75)+
facet_grid(user_id ~ .)+
guides(col = FALSE, fill = FALSE)+
theme(strip.text = element_blank())+
scale_y_continuous(breaks = c(0,0.5,1))+
scale_x_continuous(breaks = par$x.axis)
g_profiles
ggsave(filename = paste0(IO$panels, "profiles_",bc,".pdf"),
plot = g_profiles,
width = viz$full_width/4,
height = 0.65*viz$full_width/4,
scale = viz$scale*1.5)
}## Loading objects:
## mds_df
## rtsne_out_df
## this_avg_profile_per_user
## this_c_wide
## Loading objects:
## mds_df
## rtsne_out_df
## this_avg_profile_per_user
## this_c_wide
load(file = paste0(IO$out_Rdata,"pill_transition_profiles.Rdata"), verbose = TRUE)## Loading objects:
## transition_profiles
transition_profiles$TB_change_cat_simple = factor(transition_profiles$TB_change_cat_simple , levels = c("increase","no change","decrease"))
agg = unique(transition_profiles[,c("TB_change_cat_simple","transition","n_users")])
agg$cycle_nb_m_from_trans = -4
agg## TB_change_cat_simple transition n_users cycle_nb_m_from_trans
## 1 decrease off pill 534 -4
## 206 increase off pill 989 -4
## 414 no change off pill 232 -4
## 622 decrease on pill 4544 -4
## 830 increase on pill 3165 -4
## 1038 no change on pill 2146 -4
agg_sum = aggregate( n_users ~ transition, agg, FUN = sum )
colnames(agg_sum) = c("transition","n_users_tot")
agg = merge(agg, agg_sum, all = TRUE)
agg$perc_users = paste0(round(100*agg$n_users/agg$n_users_tot),"%")
agg$text = paste0(agg$n_users, "\n",agg$perc_users)
g = ggplot(transition_profiles, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = BC)) +
geom_line()+ facet_grid(transition + TB_change_cat_simple ~ cycle_nb_m_from_trans) +
scale_x_continuous(breaks = par$x.axis, limits = c(-par$D,par$Df))+ guides(col = FALSE)+
geom_text(data = agg, aes(x = -par$D, y = 0.25*max(transition_profiles$fraction_cycles_with_TB), col = transition, label = text),
nudge_x = 7, nudge_y = 0, size = 3.5)+
scale_color_manual(values = rep(cols$BC,2))+
scale_y_continuous(labels = scales::percent)+
xlab("cycleday from menstruation")+
ylab("% of cycles with reported breast tenderness")
print(g)ggsave(filename = paste0(IO$panels, "profiles_pill_transition.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.85*viz$full_width/2,
scale = viz$scale*1.1)load(file = paste0(IO$out_Rdata,"table_TB_change_cat_x_BC_df.Rdata"), verbose = TRUE)## Loading objects:
## table_TB_change_cat_x_BC_df
load(file = paste0(IO$out_Rdata,"hist_log2_TB_ratio_x_BC.Rdata"), verbose = TRUE)## Loading objects:
## hist_log2_TB_ratio_x_BC
g = ggplot(table_TB_change_cat_x_BC_df, aes(x = TB_change_cat_wrap, y = n_users, fill = BC))+
geom_bar(stat = "identity")+
facet_grid(BC ~., scale = "free")+
xlab("")+ylab("# of users")+
scale_fill_manual(values = cols$BC)
gg_hist = ggplot(hist_log2_TB_ratio_x_BC, aes(x = log2_TB_ratio, y = n_users, fill = transition))+
geom_bar(stat = "identity", width = 0.5)+
facet_grid(transition ~., scale = "free")+
xlab("log2(TB ratio: after/before)")+ylab("# of users")+
#geom_vline(xintercept = c(-value_inf+0.5,-1.5,-0.5,0.5,1.5,value_inf-0.5), col = "gray20", linetype = 2, size = 0.2)+
geom_vline(xintercept = c(-0.5,0.5), col = "gray20", linetype = 2, size = 0.2)+
geom_vline(xintercept = 0, col = "gray20", linetype = 1, size = 0.5)+
scale_fill_manual(values = cols$BC)+
scale_x_continuous(breaks = seq(-6,6,by = 1))
g_histggsave(filename = paste0(IO$panels, "pill_transition_TB_change_cat_x_BC.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.4*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "pill_transition_hist_log2_TB_ratio_x_BC.pdf"),
plot = g_hist,
width = viz$full_width/2,
height = 0.33*viz$full_width/2,
scale = viz$scale)load(file = paste0(IO$out_Rdata,"days_selected_users_for_pill_transition_examples.Rdata"), verbose = TRUE)## Loading objects:
## selected_users_days
selected_users_days$user_id = selected_users_days$stretch_id
stretch_ids = unique(selected_users_days$stretch_id)
for(stretch_id in stretch_ids){
cat("\t\t",stretch_id,"\n")
this_stretch_days = selected_users_days[selected_users_days$stretch_id == stretch_id,]
trans = unique(this_stretch_days$transition)
g = ggplot_user_history(this_stretch_days, pill_transition = trans, print_x_lab = FALSE, print_title = FALSE, make_x_axis_symmetrical = TRUE)
print(g)
ggsave(filename = paste0(IO$panels, "profiles_pill_transition_example_",stretch_id,".pdf"),
plot = g,
width = viz$full_width/2,
height = 0.18*viz$full_width/2,
scale = viz$scale*1.2)
}## user_1
## user_2
## user_3
## user_4
## user_5
## user_6
## user_7
## user_8
## user_9
## user_10